In [10]:
import os, sys
import graphlab as gl
import graphlab.aggregate as agg
from tqdm import tqdm_notebook as tqdm
# set canvas path
# gl.canvas.set_target('ipynb')
%matplotlib inline
import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [3]:
sales = graphlab.SFrame('data/home_data.gl/')
In [4]:
sales
Out[4]:
In [21]:
sns.lmplot(
x='sqft_living',
y='price',
data=sales.to_dataframe(),
fit_reg=False
) # No regression line
Out[21]:
In [22]:
train_dataset, test_dataset = sales.random_split(.8, seed=0)
In [23]:
sqft_model = gl.linear_regression.create(train_dataset, target='price', features=['sqft_living'])
In [24]:
print test_dataset['price'].mean()
In [25]:
print sqft_model.evaluate(test_dataset)
In [28]:
plt.figure(num=1, figsize=(15, 10), dpi=80)
axis_to_work = plt
axis_to_work.plot(
test_dataset['sqft_living'], test_dataset['price'], '.',
test_dataset['sqft_living'], sqft_model.predict(test_dataset), '-'
)
axis_to_work.show()
sns.despine(top=True, right=True)
In [29]:
sqft_model.get('coefficients')
Out[29]:
In [30]:
my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
In [37]:
for feature in my_features:
sns.lmplot(
x=feature,
y='price',
data=sales.to_dataframe(),
fit_reg=False,
size=3
) # No regression line
In [38]:
my_features_model = graphlab.linear_regression.create(train_dataset, target='price', features=my_features)
In [39]:
print sqft_model.evaluate(test_dataset)
print my_features_model.evaluate(test_dataset)
In [40]:
house1 = sales[sales['id'] == '5309101200']
In [41]:
house1
Out[41]:
In [42]:
print house1['price']
In [43]:
print sqft_model.predict(house1)
In [44]:
print my_features_model.predict(house1)
In [45]:
house2 = sales[sales['id'] == '1925069082']
In [46]:
house2
Out[46]:
In [47]:
print house2['price']
In [48]:
print sqft_model.predict(house2)
In [49]:
print my_features_model.predict(house2)
In [50]:
# it was Bill Gates house
In [55]:
house3 = sales[sales['id']=='5309101200']
In [56]:
house3
Out[56]:
In [57]:
print house2['price']
In [58]:
print sqft_model.predict(house2)
In [59]:
print my_features_model.predict(house2)